import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)  # 显示所有列，无省略
pd.set_option('display.max_rows', None)     # 显示所有行
pd.set_option('display.max_colwidth', None) # 当列内容过长时也完整显示
pd.set_option('display.width', 2000)        # 设定输出窗口宽度，防止换行断行

data = pd.read_csv('data/order.csv')
# print(data.head())
# 我们只关系用户购买的产品 即关心 Food% Fresh% Drinks% Home% Beauty% Health% Baby% Pets%这几列
# 所以需要进行数据筛选
x = data.iloc[:,-8:]

from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, random_state=2, init='k-means++')
y_kmeans = kmeans.fit_predict(x)

data.loc[:,'label'] = y_kmeans

print(data.groupby('label').describe())